package ca.pfv.spmf.algorithms.frequentpatterns.charm;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
* 
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
* 
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
* 
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;

import ca.pfv.spmf.datastructures.triangularmatrix.TriangularMatrix;
import ca.pfv.spmf.input.transaction_database_list_integers.TransactionDatabase;
import ca.pfv.spmf.patterns.itemset_array_integers_with_tids_bitset.Itemset;
import ca.pfv.spmf.patterns.itemset_array_integers_with_tids_bitset.Itemsets;

/**
 * This is an implementation of the CHARM-MFI algorithm (thesis of L. Szathmary, 2006) 
 * that is a simple extension that takes the output of CHARM as input and 
 * do post-processing to keep only maximal itemsets. <br/><br/>
 * 
 * But it was found that the Charm-MFI algorithm is actually incorrect so
 * this implementation was modified to fix the original algorithm and 
 * generate the correct result. <br/><br/>
 * 
 * Note that this algorithm is not very efficient because the maximal itemsets 
 * are generated by post-processing.<br/><br/>
 * 
 * Also, note that this version can save the result to a file
 * or keep it into memory if no output path is provided
 * by the user to the runAlgorithm() method.
 * 
 * @see TriangularMatrix
 * @see TransactionDatabase
 * @see Itemset
 * @see Itemsets
 * @see HashTable
 * @see AlgoCharm_Bitset
 * @author Philippe Fournier-Viger
 */
public class AlgoCharmMFI {
	
	/**  start time of the last execution */
	private long startTimestamp;
	/** end  time of the last execution */
	private long endTimestamp; 

	/** 
	 The  patterns that are found 
	 (if the user want to keep them into memory) */
	protected Itemsets maximalItemsets;
	/** object to write the output file */
	BufferedWriter writer = null; 
	
	/**
	 * Default constructor
	 */
	public AlgoCharmMFI() {
	}

	/**
	 * Run the CHARM-MFI algorithm.
	 * @param output an output file path or null if the user want to keep the result in memory.
	 * @param frequentClosed a set of frequent closed itemsets
	 * @return the set of maximal itemsets (if the user chose to keep the result in memory.
	 * @throws IOException An exception if an error occurs while writting the output to a file.
	 */
	public Itemsets runAlgorithm(String output, Itemsets frequentClosed) throws IOException {
		
		// if the user want to keep the result into memory
		if(output == null){
			writer = null;
	    }else{ // if the user want to save the result to a file
			writer = new BufferedWriter(new FileWriter(output)); 
		}

		// Initialize the structure to store maximal itemsets
		maximalItemsets =  frequentClosed;
		maximalItemsets.setName("FREQUENT MAXIMAL ITEMSETS");
		
		// record the start time of the algorithm
		startTimestamp = System.currentTimeMillis();

		// get the size of the largest closed itemset.
		int maxItemsetLength = frequentClosed.getLevels().size();
		
		// IMPORTANT NOTE : THE ORIGINAL ALGORITHM IS INCORRECT (IT
		// DOES NOT PRODUCE THE SET OF MAXIMAL ITEMSETS IN SOME CASES BECAUSE
		// IT ONLY COMPARES CLOSED ITEMSETS OF SIZE I WITH THOSE OF SIZE I+1. 
		// HOWEVER, IT IS POSSIBLE THAT AN ITEMSET IS NOT MAXIMAL BECAUSE
		// OF A CLOSED ITEMSET OF A SIZE LARGER THAN I+1. ):
		// TO FIX IT THE ALGORITHM, WE HAVE MODIFIED IT AS FOLLOWS:
		
		// For closed itemsets of size i=1 to the largest size
		for (int i = 1; i < maxItemsetLength - 1; i++) {
			// Get the itemsets of size i
			List<Itemset> ti = frequentClosed.getLevels().get(i);
			// For closed itemsets of size j = i+1 to the largest size
			for (int j = i+1; j < maxItemsetLength; j++) {
				// get itemsets of size j
				List<Itemset> tip1 = frequentClosed.getLevels().get(j);
				
				// Check which itemsets are maximals by comparing itemsets
				// of size i and i+1
				findMaximal(ti, tip1, frequentClosed);
			}
		}
		
		// If the user chose to save the output to a file
		if(writer != null){
			// For itemsets of size i = 1 to the maximum itemset length
			for(List<Itemset> level : maximalItemsets.getLevels()){
				// For each itemset of length i
				for(int i=0; i < level.size(); i++){
					Itemset itemset = level.get(i);
					// save the itemset an its support
					writer.write(itemset.toString() + " #SUP: "	+ itemset.getAbsoluteSupport());
					writer.newLine();
				}
			}
			writer.close();
		}

		endTimestamp = System.currentTimeMillis();
		// Return all frequent maximal itemsets found!
		return maximalItemsets; 
	}

	/**
	 * Check if itemsets of size i are closed by comparing them with
	 * itemsets of size j where j > i.
	 * @param ti  itemsets of size i
	 * @param tip1 itemsets of size j
	 * @param maximalItemsets the current set of maximal itemsets
	 */
	private void findMaximal(List<Itemset> ti, List<Itemset> tip1, Itemsets maximalItemsets) {
		// for each itemset of j
		for (Itemset itemsetJ : tip1) {
			// iterates over the itemsets of size i
			Iterator<Itemset> iter = ti.iterator();
			while (iter.hasNext()) {
				Itemset itemsetI = (Itemset) iter.next();
				// if the current itemset of size i is contained
				// in the current itemset of size J
				if (itemsetJ.containsAll(itemsetI) ) {
					// Then, it means that the itemset of size I is not maximal so we remove it
					iter.remove();
					// We decrease the current number of maximal itemsets.
					maximalItemsets.decreaseItemsetCount();
					// NOTE: IF WE WOULD LIKE TO MAKE THIS MORE EFFICIENT
					// WE COULD USE LINKED-LIST TO STORE ITEMSETS 
					// INSTEAD OF ARRAY LISTS.....
					// THE COST FOR THE REMOVE OPERATION MAY BE SMALLER...
				}	
			}
		}
	}

	/**
	 * Print statistics about the algorithm execution to System.out.
	 */
	public void printStats(int transactionCount) {
		System.out.println("=============  CHARM-MFI - STATS =============");
		long temps = endTimestamp - startTimestamp;
		System.out.println(" Transactions count from database : "
				+ transactionCount);
		System.out.println(" Frequent maximal itemsets count : "
				+ maximalItemsets.getItemsetsCount());
		System.out.println(" Total time ~ " + temps + " ms");
		System.out.println("===================================================");
	}

	/**
	 * Get the set of maximal itemsets found by Charm-MFI
	 * @return the set of maximal itemsets
	 */
	public Itemsets getItemsets() {
		return maximalItemsets;
	}
}
